#delimit;
set more off;

set logtype text;
log using /home/dnc2101/Accidental_Deaths/Vital_Stats/do_files_other_years/logload1994.txt, replace;

/*------------------------------------------------
  This program reads the 1981 NCHS Multiple Cause of Death Data Data File
  by Jean Roth Mon Aug 14 15:53:44 EDT 2006
  Please report errors to jroth@nber.org
  NOTE:  This program is distributed under the GNU GPL.
  See end of this file and http://www.gnu.org/licenses/ for details.
  Run with do mort1981
----------------------------------------------- */;

/* The following line should contain
   the complete path and name of the raw data file.
   On a PC, use backslashes in paths as in C:\  */;

local dat_name "/home/dnc2101/Accidental_Deaths/Vital_Stats/DAT_Files/Mort94.dat";

/* The following line should contain the path to your output '.dta' file */;

local dta_name "/home/dnc2101/Accidental_Deaths/Vital_Stats/dta_files_other_years/mort1994";

/* The following line should contain the path to the data dictionary file */;

local dct_name "/home/dnc2101/Accidental_Deaths/Vital_Stats/DCT_Files/mort1994.dct";

infile using "`dct_name'", using("`dat_name'") clear;


/*
Copyright 2006 shared by the National Bureau of Economic Research and Jean Roth

National Bureau of Economic Research.
1050 Massachusetts Avenue
Cambridge, MA 02138
jroth@nber.org

This program and all programs referenced in it are free software. You
can redistribute the program or modify it under the terms of the GNU
General Public License as published by the Free Software Foundation;
either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
USA.
*/;


compress;

** keep only injury related deaths;

keep if (ucr282>=29900 & ucr282<=35800);


** save smaller file in Stata format;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/dta_files_other_years/mort1994.dta", replace;


** now keep only the variables I intend to use and save a file for appending onto the other files;
** Running list and description of new variables that get added to the data in years after 1981 - ;
** FIPS state and county codes for place of occurence of death and place of residence of decedent are now in the data starting in 1982;
** the variables and their definitions are the following - ;
** fipsctyo, county occurence, fipssto, state occurence, fipsctyr, county residence, fipsstr, state residence, fipssmsa, metropolitan statistical area of residence - fipssmsa was actually in the 1981 data;

** variable called "origin" enters data in 1983 - starting in 1984 values are actually entered for this variable, unlike in 1983 when the variable is present but data on it is missing for all observations... p. 30 of the 1984 Codebook pdf describes the variable - you can get Hispanic origin from it, as well as a bunch of useless info on what country in Europe a white decedent's ancestors were from and so forth;

** as in 1983, columns for occupation and industry variables are in the data, and starting with 1985, ;
** data for occupation and industry begins actually being entered here;


** starting in 1989 - "hospstat" is now called "placdth", and "hispanic" and "hspanicr" have replaced "origin" - see p. 36 of the pdf for definitions of these variables;
** also starting in 1989... "popsize" had been in the data and is "population size of city of residence",  but now data has this info as well as population of county of occurence and county of residence, and population of SMSA... see p. 18 of the Codebook for the 1989 data... if you read further in this Codebook you find information confirming that "popsize" is the same variable that it always was, population size of city of residence;


** starting in 1990 - "fipspmsa" and "fipscmsa" are in the data now, rather than "fipssmsa" as in previous years...;
** I have decided to just keep them both variables in the data, although I wasn't really planning on using either... see also p. 43 of the 1990 Codebook pdf, I think fipspmsa is comparable to the old fipssmsa as the CMSA was a new creation in 1990;
** also starting in 1990 - "racer2" has been replaced by "racer5" - these are Race Recode variables, but in this project we do not differentiate between death rates by race ;   

** starting in 1991 - a change from 1990 is that now only "fipspmsa" is in the data instead of both it and "fipscmsa";
** also starting in 1991, "hispanr", a new Hispanic origin indicator, now appears in the data;  
** final change for 1991 is that there are new age recodes variables in the data - "ager51", "ager20", and "ager12";  

** starting in 1992 - "racer5" is replaced by "racer4" - these are race recode variables;
** also starting in 1992 - "raceimp", a race imputation flag, enters the data - see the 1992 Codebook for more detail on it;

** starting in 1993 - variable for if injury occurred at work, "injwork", enters the data;



keep datayear rectype restatus countyoc stateoc countyrs staters popsize metro monthdth sex race racer3 racer4 raceimp hispanr hispanic hspanicr age ager52 ager20 ager12 placdth marstat accident injwork fipsctyo fipssto fipsctyr fipsstr fipspmsa ucod ucr282 ucr72 ucr61 ucr34;

save "/home/dnc2101/Accidental_Deaths/Vital_Stats/Data_For_Appending/short_mort1994.dta", replace;





log close;

